clear
set more off
pause on

cap log close
log using check-election.log, text replace

******************************************************************************************
* Name: 	check-election.do
* Purpose: 	this do file uses the merged election data to check the sample 
* input:	ca-election-reclink.dta
* output: 	ca-election-reclink-total.dta
******************************************************************************************

use ca-election-reclink, clear

*****************************************************************
* Merge in the dropped observations that have run again outcomes
* This means individuals have run twice within a given calendar year
*****************************************************************

save ca-election-reclink-total.dta, replace

forval x=1995/2014 {

	use ca-local-dropped`x', clear
	keep if runagain==1
	keep FIRST LAST CNTYNAME BALDESIG raceid date place office term vote`x' first_orig incumbent num_cand elected rank runoff year occ voteshare margin runagain totalvotes	
	
	rename BALDESIG baldesig`x'
	rename runagain year_runagain`x'
	
	foreach var in baldesig`x' raceid`x' date`x' place`x' office`x' term`x' vote`x' first_orig`x' incumbent`x' num_cand`x' elected`x' rank`x' runoff`x' occ`x' voteshare`x' margin`x' totalvotes`x' {
		rename `var' `var'_runagain
	}
	
	merge 1:1 FIRST LAST CNTYNAME year`x' using ca-election-reclink-total.dta
	list FIRST LAST CNTYNAME if _m==1
	
	****** drop individuals who didn't run again in same county (this is a restriction I impose on the other linkages)
	drop if _m==1
	drop _m
	
	save ca-election-reclink-total.dta, replace
		
}	

save ca-election-reclink-total.dta, replace

*******************************************************
* Check sample over time (county/place/# races)
*******************************************************

* count counties: all counties are represented across years
forval x=1995/2014 {
	bys CNTYNAME: gen y = _n
	egen total_y = total(y) if y==1
	disp "`x'"
	list total_y if _n==1
	drop y total_y 
}

* count unique races by county in each year
forval x=1995/2014 {
	bys CNTYNAME raceid`x': gen y = _n
	bys CNTYNAME: egen temp = total(y) if y==1
	bys CNTYNAME: gen z = _n
	bys CNTYNAME: egen race_total_`x' = max(temp)
	disp "`x'" 
	list CNTYNAME race_total_`x' if z==1
	drop y z temp
}

bys CNTYNAME: gen z = _n
keep if z==1

keep CNTYNAME race_total_*

list

log close
